10-13-2017
library(tidyverse)
library(lubridate)
library(stringr)
us <- read_csv("scrubbed.csv.zip") %>%
mutate(
date_time = mdy_hm(datetime),
year_sighted = year(date_time)
) %>%
filter(country == "us", year_sighted < 2014) # 2014 is incomplete
Are demographic characteristics related to the number of reported sightings?
Is it just population density?
Drunk people?
Bigfoot sightings, 1921-2013 (Joshua Stevens @jscarto)
tidycensus and the sf package to get our population datalibrary(tidycensus)
library(sf)
# sign up at: https://api.census.gov/data/key_signup.html
# hiding my key, but calling census_api_key() to enable requests
source("census-api-key.R")
# set up the point data
us$geometry <- 1:nrow(us) %>%
lapply(function(x) st_point(c(us$longitude[x], us$latitude[x]))) %>%
st_sfc(crs = 4269)
# fortify the tbl
us2 <- st_sf(us)
get_acs(): primary workhorse for ACS dataget_decennial()load_variables(): retrieve table of variables you're wanting to pullmoe_ functions to calculate margin of errorglimpse(load_variables(2013, "acs5"))
## Observations: 45,388 ## Variables: 3 ## $ name <chr> "AIANHH", "AIHHTLI", "AITS", "AITSCE", "ANRC", "B00001... ## $ label <chr> "FIPS AIANHH code", "American Indian Trust Land/Hawaii... ## $ concept <chr> "Selectable Geographies", "Selectable Geographies", "S...
countydat <-
get_acs(
geography = "county",
variables = c("B17006_001", "B17006_002"),
output = "wide",
geometry = TRUE
) %>%
mutate(area = st_area(geometry)) %>%
st_transform(4269) %>%
st_join(us2, join = st_contains) # here we match each point into their county
class(countydat)
## [1] "sf" "data.frame"
geometry = TRUE ensures that the TIGRIS files that provide the outlines of each county are returned with the query# drop the st class here, then create a count of sightings per county
rates <- countydat %>%
as.data.frame %>%
count(`NAME`) %>%
right_join(
countydat %>%
select(NAME, B17006_001E, area, geometry) %>%
distinct
) %>%
mutate(
d = B17006_001E / as.numeric(area / 1000000), # <-- gotta make sure it's km^2
dm = Hmisc::cut2(d, g = 3, levels.mean = TRUE), # next we cut up the distribution
nm = Hmisc::cut2(n, g = 3, levels.mean = TRUE) # on both vars, for bivariate comparison
)
levels(rates$dm) <- 1:3
levels(rates$nm) <- 1:3
rates$bin <- str_c(rates$dm, "-", rates$nm) %>%
factor(levels = c(
"3-1", "2-1", "1-1", # col 1 -->
"3-2", "2-2", "1-2", # col 2 -->
"3-3", "2-3", "1-3" # col 3 -->
))
# from colorbrewer
vals <- c(
"#8c510a", "#bf812d", "#dfc27d", # col 1 -->
"#f6e8c3", "#f5f5f5", "#c7eae5", # col 2 -->
"#80cdc1", "#35978f", "#01665e" # col 3 -->
)
library(leaflet) # set up a palette for leaflet pal <- colorFactor(palette = rev(vals), rates$bin)
leaflet works well with sf class data framesCan compose maps with lines, tiles, markers (points), and polygons
leaflet() to initialze a leaflet plot%>% operatoraddPolygons() to make use of geometry data we have in a sf data frame
bv_choro <- rates %>%
filter(!str_detect(NAME, "Alaska|Hawaii|Puerto")) %>%
st_sf() %>%
st_transform(crs = "+proj=longlat +datum=WGS84") %>%
leaflet() %>%
addPolygons( # uses geometry features to draw counties
color = "black", # popups will be highlighted and contain data on n of sightings & density
popup = ~glue_data(., "{NAME}; Sightings: {n}, Density: {round(d, 2)}"),
stroke = TRUE,
weight = 1,
smoothFactor = 0.2,
fillOpacity = 1,
fillColor = ~pal(bin),
highlightOptions = highlightOptions(color = "white", weight = 2, bringToFront = TRUE)
)